{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <div align='center'> Pandas之数据定位 </div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 参考　https://stackoverflow.com/questions/28757389/pandas-loc-vs-iloc-vs-ix-vs-at-vs-iat\n",
    "\n",
    "df = pd.DataFrame({'age':[30, 2, 12, 4, 32, 33, 69],\n",
    "                   'color':['blue', 'green', 'red', 'white', 'gray', 'black', 'red'],\n",
    "                   'food':['Steak', 'Lamb', 'Mango', 'Apple', 'Cheese', 'Melon', 'Beans'],\n",
    "                   'height':[165, 70, 120, 80, 180, 172, 150],\n",
    "                   'score':[4.6, 8.3, 9.0, 3.3, 1.8, 9.5, 2.2],\n",
    "                   'state':['NY', 'TX', 'FL', 'AL', 'AK', 'TX', 'TX']\n",
    "                   },\n",
    "                  index=['Jane', 'Nick', 'Aaron', 'Penelope', 'Dean', 'Christina', 'Cornelia'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# loc 通过行标签(labels)定位"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 一个标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_Penelope = df.loc['Penelope']\n",
    "ss_Penelope "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多个标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_Jane_Nick = df.loc[['Jane', 'Nick']]\n",
    "df_Jane_Nick "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 标签切片选取（slice)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_Aaron_to_Dean = df.loc['Aaron':'Dean']\n",
    "df_Aaron_to_Dean "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 行标签+列标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_age_score_of_Jane_Nick = df.loc[['Jane', 'Nick'], ['age', 'score']]\n",
    "df_age_score_of_Jane_Nick "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 区别\n",
    "\n",
    "[参考](http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc['Jane', 'age'] # 同一块内存上操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc['Jane']['age'] # 内存有copy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "-------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# iloc 通过行索引(index)定位"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 一个索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_Penelope = df.iloc[3]\n",
    "ss_Penelope "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多个索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_Jane_Nick = df.iloc[[0, 1]]\n",
    "df_Jane_Nick "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 索引切片选取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_Aaron_to_Dean = df.iloc[2:-2]\n",
    "df_Aaron_to_Dean "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 行索引+列索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_age_score_of_Jane_Nick = df.iloc[[0,1], [0,4]]\n",
    "df_age_score_of_Jane_Nick"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 标签(labels) 和 索引(index)之间互转换"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 行索引 -> 行标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "row_names = df.index[[1, 3]]\n",
    "row_names "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 行标签 -> 行引用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "row_idxes = [df.index.get_loc(row) for row in row_names]\n",
    "row_idxes "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 列索引 -> 列标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "col_names = df.columns[[2, 4]]\n",
    "col_names "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 列标签 -> 列索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "col_idxes = [df.columns.get_loc(col) for col in col_names]\n",
    "col_idxes "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 掩码定位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_age_bt_30 = df.age > 30\n",
    "ss_age_bt_30 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(ss_age_bt_30.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[df.age > 30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[ss_age_bt_30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[ss_age_bt_30.values]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# at 和 iat 选取\n",
    "\n",
    "和 loc, iloc类似, 只能选取单一标量, 性能好些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.at['Dean', 'age']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iat[4, 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 其他"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选取age列: case1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['age'] # or df.age"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选取age列: case2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.age"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选取age列: case3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iloc[:, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[0:2]['age']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[0:2]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
